rm(list = ls())

library(tidyverse)
library(stargazer)
library(lfe)
library(broom) #for tidy
library(lmtest) # For coeftest()
library(sandwich) # For sandwich()
library(estimatr)
library(texreg)
library(psych)
library(default)
library(sjmisc) #for row_sums
library(quantreg)
library(ivreg)

options(tibble.print_min = 10)
source("functions.R")

# Brother IV figure a la Edin--------------------------------------------------------------

load("./data_output/persons_years.Rdata")

# Take extra years for the top and bottom tails of the estimation window
bro_panel = dta %>%
  filter(!is.na(cog_bro)) %>% 
  filter(ika %in% 34:41) %>% 
  filter(vuosi > 1998)

# Add +-2 years worth of earnings for each individual to smooth estimates  
bro_sample = bro_panel %>% 
  bind_rows(bro_panel %>% mutate(vuosi = vuosi -2, ika = ika -2)) %>% 
  bind_rows(bro_panel %>% mutate(vuosi = vuosi -1, ika = ika -1)) %>% 
  bind_rows(bro_panel %>% mutate(vuosi = vuosi + 1, ika = ika +1)) %>% 
  bind_rows(bro_panel %>% mutate(vuosi = vuosi + 2, ika = ika +2))

# Then restrict the sample to correct years
bro_sample = bro_sample %>%
  filter(ika %in% 36:39) %>% 
  filter(vuosi > 2000, vuosi < 2016) %>% 
  mutate(inc_labor = ifelse(is.na(inc_labor), 0, inc_labor)) %>%
  mutate(inc_log = ifelse(inc_labor == 0, NA, log(inc_labor)),
         extensive = if_else(inc_log > 9 & emp_11 == T, T, F, missing = F))

# Model 1
trendsPlot = function(my_data, my_model){
  nest_mincer = my_data %>% 
    #filter(inc_prop > 8) %>% 
    group_by(vuosi) %>% nest %>% 
    mutate(mincer1 = map(data, ~feols(as.formula(my_model), data = ., cluster = ~ shnro)),
           tidied1 = map(mincer1, ~tidy(., conf.int = T)))
  
  nest_mincer_unnest = nest_mincer %>% 
    select(tidied1) %>% 
    unnest(cols = c(tidied1)) %>% ungroup %>% mutate(term = str_replace(term, "fit_", "")) %>% 
    filter(term %in% my_vars)
  
  nest_mincer_unnest %>% 
    ggplot(aes(x = vuosi, y = estimate, color = as.factor(term), shape = as.factor(term), linetype = as.factor(term))) +
    geom_point(size = 3) +
    geom_line(size = 0.6) +
    geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = as.factor(term)), alpha = 0.1, color = NA)  +
    theme_bw() +
    scale_color_manual(name = "Coefficient", labels = var_names, values = color_types) +
    scale_fill_manual(name = "Coefficient", labels = var_names, values = color_types) +
    scale_shape_manual(name = "Coefficient", labels = var_names, values = point_types) +
    scale_linetype_manual(name = "Coefficient", labels = var_names, values = line_types) +
    labs(x = "Year",
         y = "Estimate") +
    scale_y_continuous(breaks = pretty) +
    scale_x_continuous(breaks = 2000:2015, labels = paste0("'", str_sub(2000:2015, 3, 4))) +
    guides(fill = FALSE)
}

trendsPlot(bro_sample %>% filter(inc_labor!=0), 
           "inc_log ~ 1 | synvuosi | cog + noncog  ~ cog_bro + noncog_bro")
ggsave("/figures/bro_IV_inclog=cog+noncog.pdf", height = 4, width = 7)

trendsPlot(bro_sample %>% filter(inc_labor!=0), 
           "inc_log ~ 1 | synvuosi | cog + extro + consci ~ cog_bro + extro_bro + consci_bro")
ggsave("/figures/bro_IV_inclog=cog+extro+consci.pdf", height = 4, width = 7.3)



# Brother correlations ----------------------------------------------------

library(confintr)
load(file = 'data_output/persons_sample.Rdata')
bro_sample_persons = persons %>% filter(!is.na(cog_bro))


nest_mincer = bro_sample_persons %>% 
  group_by(synvuosi) %>% nest %>% 
  mutate(cog = map(data, ~ci_cor(.x$cog, .x$cog_bro) %>% unlist),
         extro = map(data, ~ci_cor(.x$extro, .x$extro_bro) %>% unlist),
         consci = map(data, ~ci_cor(.x$consci, .x$consci_bro) %>% unlist))

nest_mincer_unnest = 
  bind_rows(
    bind_rows(nest_mincer$cog) %>% mutate(synvuosi = nest_mincer$synvuosi, term = "cog"), 
    bind_rows(nest_mincer$extro) %>% mutate(synvuosi = nest_mincer$synvuosi, term = "extro"), 
    bind_rows(nest_mincer$consci) %>% mutate(synvuosi = nest_mincer$synvuosi, term = "consci")) %>% 
  mutate(across(c(interval1, interval2, estimate), ~ as.numeric(.x)))
  

nest_mincer_unnest %>% 
ggplot(aes(x = synvuosi, y = estimate, color = as.factor(term), shape = as.factor(term), linetype = as.factor(term))) +
geom_point(size = 3) +
geom_line(size = 0.6) +
geom_ribbon(aes(ymin = interval1, ymax = interval2, fill = as.factor(term)), alpha = 0.1, color = NA)  +
theme_bw() +
scale_color_manual(name = "Dimension", labels = var_names, values = color_types) +
scale_fill_manual(name = "Dimension", labels = var_names, values = color_types) +
scale_shape_manual(name = "Dimension", labels = var_names, values = point_types) +
scale_linetype_manual(name = "Dimension", labels = var_names, values = line_types) +
labs(x = "Birth Cohort",
     y = "Brother Correlation") +
scale_y_continuous(breaks = pretty) +
  scale_x_continuous(breaks = 1962:1979, labels = paste0("'", str_sub(1962:1979, 3, 4))) +
  guides(fill = FALSE)

ggsave("/figures/bro_correlations_trends.pdf", height = 4, width = 7)
    


# Cross section Brother IV -----------------------------------------------------------

load(file = 'data_output/persons_years_sample.Rdata')


bro_sample_cross = full_sample %>% 
  filter(!is.na(cog_bro)) %>% 
  filter(inc_labor != 0) %>% 
  mutate(Earnings = inc_log)

bro_sample %>% count(synvuosi) %>% print(n=100)
bro_sample %>% count(vuosi) %>% print(n=100)
  
    
models = list(
  feols(Earnings ~ 1 |  vuosi^synvuosi | cog + noncog ~ cog_bro + noncog_bro, 
        cluster = ~ shnro, data = bro_sample_cross),
  feols(Earnings ~ 1 |  vuosi^synvuosi | extro + consci ~ extro_bro + consci_bro, 
        cluster = ~ shnro, data = bro_sample_cross),
  feols(Earnings ~ 1 |  vuosi^synvuosi | cog + extro + consci ~ cog_bro + extro_bro + consci_bro, 
        cluster = ~ shnro, data = bro_sample_cross),
  feols(Earnings ~ educ_y | vuosi^synvuosi, 
        cluster = ~ shnro, data = bro_sample_cross),
  feols(Earnings ~ educ_y |  vuosi^synvuosi | cog + extro + consci ~ cog_bro + extro_bro + consci_bro, 
        cluster = ~ shnro, data = bro_sample_cross)
)


outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)

etable(models,
       file = "/tables/personality_earnings_bro.tex",
       title = "Returns to Skills",
       headers = list("Dependent variable: log(Earnings)" = 5),
       label = "tab:earnings",
       tex = T,
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_personality_earnings_bro.tex}")
)  
 



# Introduce noise ---------------------------------------------------------

source("functions.R")

noise_sample = full_sample %>% 
  mutate(noncog_noise1 = noncog + rnorm(nrow(full_sample), 0, (synvuosi-1962)*0.02),
         noncog_noise2 = noncog + rnorm(nrow(full_sample), 0, (synvuosi-1962)*0.04),
         noncog_noise3 = noncog + rnorm(nrow(full_sample), 0, 0.2),
         noncog_noise4 = noncog + rnorm(nrow(full_sample), 0, 1))

trendsPlot(noise_sample %>% filter(inc_labor!=0, !is.na(noncog)), 
           "inc_log ~ cog + noncog")

trendsPlot(noise_sample %>% filter(inc_labor!=0, !is.na(noncog)), 
           "inc_log ~ cog + noncog_noise1")
ggsave("/figures/panel_inclog=cog+noncog_noise2.pdf", height = 4, width = 7)


trendsPlot(noise_sample %>% filter(inc_labor!=0, !is.na(noncog)), 
           "inc_log ~ cog + noncog_noise2")
ggsave("/figures/panel_inclog=cog+noncog_noise4.pdf", height = 4, width = 7)


trendsPlot(noise_sample %>% filter(inc_labor!=0, !is.na(noncog)), 
           "inc_log ~ cog + noncog_noise3")
ggsave("/figures/panel_inclog=cog+noncog_noise20.pdf", height = 4, width = 7)


trendsPlot(noise_sample %>% filter(inc_labor!=0, !is.na(noncog)), 
           "inc_log ~ cog + noncog_noise4")
ggsave("/figures/panel_inclog=cog+noncog_noise100.pdf", height = 4, width = 7)



# # Bro IV figure a la Ramin  ---------------------------------------------------------------------
# 
# load(file = 'data_output/persons_years_sample.Rdata')
# 
# 
# bro_sample = full_sample %>% 
#   filter(!is.na(cog_bro))
# bro_sample %>% count(synvuosi) %>% print(n=100)
# bro_sample %>% count(vuosi) %>% print(n=100)
# 
# bro_sample_pos = bro_sample %>% filter(inc_labor != 0)
# 
# # Model 1
# trendsPlot = function(my_data, my_model){
#   nest_mincer = my_data %>% 
#     #filter(inc_prop > 8) %>% 
#     group_by(vuosi) %>% nest %>% 
#     mutate(mincer1 = map(data, ~feols(as.formula(my_model), data = ., vcov = "hetero")),
#            tidied1 = map(mincer1, ~tidy(., conf.int = T)))
#   
#   nest_mincer_unnest = nest_mincer %>% 
#     select(tidied1) %>% 
#     unnest(cols = c(tidied1)) %>% ungroup %>% mutate(term = str_replace(term, "fit_", "")) %>% 
#     filter(term %in% my_vars)
#   
#   nest_mincer_unnest %>% 
#     ggplot(aes(x = vuosi, y = estimate, color = as.factor(term), shape = as.factor(term), linetype = as.factor(term))) +
#     geom_point(size = 3) +
#     geom_line(size = 0.6) +
#     geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = as.factor(term)), alpha = 0.1, color = NA)  +
#     theme_bw() +
#     scale_color_manual(name = "Coefficient", labels = var_names, values = color_types) +
#     scale_fill_manual(name = "Coefficient", labels = var_names, values = color_types) +
#     scale_shape_manual(name = "Coefficient", labels = var_names, values = point_types) +
#     scale_linetype_manual(name = "Coefficient", labels = var_names, values = line_types) +
#     labs(x = "Year",
#          y = "Value") +
#     scale_y_continuous(breaks = pretty) +
#     scale_x_continuous(breaks = 2000:2015, labels = paste0("'", str_sub(2000:2015, 3, 4))) +
#     guides(fill = FALSE)
# }
# 
# trendsPlot(bro_sample_pos, 
#            "inc_log ~ 1 | synvuosi | cog + noncog  ~ cog_bro + noncog_bro")
# ggsave("/figures/bro_IV_inclog=cog+noncog.pdf", height = 4, width = 7)
# 
# trendsPlot(bro_sample_pos, 
#            "inc_log ~ 1 | synvuosi | extro + consci  ~ extro_bro + consci_bro")
# ggsave("/figures/bro_IV_inclog=extro+consci.pdf", height = 4, width = 7)
# 
# trendsPlot(bro_sample_pos, 
#            "inc_log ~ 1 | synvuosi | cog + extro + consci ~ cog_bro + extro_bro + consci_bro")
# ggsave("/figures/bro_IV_inclog=cog+extro+consci.pdf", height = 4, width = 7)
# 
# trendsPlot(bro_sample_pos, 
#            "inc_log ~ educ_y | synvuosi | cog + extro + consci  ~ cog_bro + extro_bro + consci_bro")
# ggsave("/figures/bro_IV_inclog=cog+extro+consci+educ_y.pdf", height = 4, width = 7)
# 
# 
# trendsPlot(bro_sample, "educ_y ~ 1 | synvuosi | cog + extro + consci  ~cog_bro + extro_bro + consci_bro")
# ggsave("/figures/bro_IV_educ_y=cog+consci+extro.pdf", height = 4, width = 7)
# trendsPlot(bro_sample, "educ_y ~ 1 | synvuosi | cog + noncog  ~ cog_bro + noncog_bro")
# ggsave("/figures/bro_IV_educ_y=cog+noncog.pdf", height = 4, width = 7)
# 
# 
# # Baseline trends in bro sample
# trendsPlot(bro_sample_pos, 
#            "extensive ~  cog + extro + consci | synvuosi")
# 
# 

# Brother IV  with linear trends ------------------------------------------
# 
# p2 = feols(inc_log ~ vuosi | synvuosi | cog + noncog + cog:vuosi + noncog:vuosi  ~ 
#              cog_bro + noncog_bro + cog_bro:vuosi + noncog_bro:vuosi, data = bro_sample) %>% summary
# 
# 
# 
# model1 = feols(inc_log ~ vuosi | synvuosi | cog + extro + consci + cog:vuosi + extro:vuosi + consci:vuosi  ~ 
#                  cog_bro + extro_bro + consci_bro + cog_bro:vuosi + extro_bro:vuosi + consci_bro:vuosi, data = bro_sample, cluster =  ~ shnro) %>% summary
# 
# 
# model1 = model1 %>% tidy(., conf.int = T) %>% mutate(term = str_replace(term, "fit_", "")) 
# 
# cog_slope = model1 %>% filter(term == "cog:vuosi") %>% pull(estimate)
# cog_intercept =  model1 %>% filter(term == "cog") %>% pull(estimate)
# extro_slope = model1 %>% filter(term == "extro:vuosi") %>% pull(estimate)
# extro_intercept =  model1 %>% filter(term == "extro") %>% pull(estimate)
# consci_slope = model1 %>% filter(term == "consci:vuosi") %>% pull(estimate)
# consci_intercept =  model1 %>% filter(term == "consci") %>% pull(estimate)
# conf_low = model1$conf.low
# 
# p1_plot = tibble(vuosi = 2001:2015,
#                  cog = cog_intercept + cog_slope*vuosi,
#                  extro = extro_intercept + extro_slope*vuosi,
#                  consci = consci_intercept + consci_slope*vuosi) 
# p1_plot = p1_plot %>% pivot_longer(c(cog, extro, consci)) %>% 
#   mutate(conf.low = case_when(name == "cog" ~ value - 2*model1$std.error[model1$term == "cog:vuosi"],
#                               name == "consci" ~ value - 2*model1$std.error[model1$term == "consci:vuosi"],
#                               name == "extro" ~ value - 2*model1$std.error[model1$term == "extro:vuosi"])) %>% 
#   mutate(conf.high = case_when(name == "cog" ~ value + 2*model1$std.error[model1$term == "cog:vuosi"],
#                                name == "consci" ~ value + 2*model1$std.error[model1$term == "consci:vuosi"],
#                                name == "extro" ~ value + 2*model1$std.error[model1$term == "extro:vuosi"]))
# 
# p1_plot %>% ggplot(aes(vuosi, value)) +
#   geom_line(aes(color = name)) +
#   geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = as.factor(name)), alpha = 0.1, color = NA)
# 
# 
# p1_plot %>% ggplot(aes(2001:2015, c(0,1))) +
#   geom_abline(slope = cog_slope,
#               intercept = cog_intercept + 2001*cog_slope)
# 
# test = predict(model1, newdata = tibble(vuosi = 2001:2015, cog = rep(1,15), extro = rep(0,15), consci = rep(0,15), synvuosi = rep(1970, 15)),
#                interval = "confidence")

